knitr::opts_chunk$set(fig.align="center") 
library(rstanarm)
library(tidyverse)
library(tidybayes)
library(modelr) 
library(ggplot2)
library(magrittr)  
library(emmeans)
library(bayesplot)
library(brms)
library(gganimate)

theme_set(theme_light())
task_list <- c("3. Prediction", "4. Exploration")
seed = 12

Number of Interacted Variable Sets

Read in and format data

interacted_var_sets_data <- read.csv("split_by_participant_groups/num_of_interacted_variable_set.csv")
interacted_var_sets_data <- interacted_var_sets_data %>%
  mutate(
    dataset = as.factor(dataset),
    oracle = as.factor(oracle),
    search = as.factor(search),
    task = as.factor(task),
    participant_group = as.factor(participant_group)
  )

Train model

prior_mean = 35.24
prior_sd = 25.33

stanvars <- stanvar(prior_mean, name='prior_mean') + stanvar(prior_sd, name='prior_sd')
model_interacted_var_sets <- brm(
    formula = num_interacted_variable_set ~ oracle * search + dataset + task + participant_group + (1 | participant_id),
    prior = prior(normal(prior_mean, prior_sd), class = Intercept),
    chains = 2,
    cores = 2,
    iter = 2500,
    warmup = 1000,
    data = interacted_var_sets_data,
    stanvars=stanvars,
    seed = seed,
    file = "models/interacted_var_sets_group"
  )

Plot

plot(model_interacted_var_sets)

Summary

summary(model_interacted_var_sets)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: num_interacted_variable_set ~ oracle * search + dataset + task + participant_group + (1 | participant_id) 
##    Data: interacted_var_sets_data (Number of observations: 132) 
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
##          total post-warmup samples = 3000
## 
## Group-Level Effects: 
## ~participant_id (Number of levels: 66) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     6.36      1.27     3.81     8.84 1.00      950     1347
## 
## Population-Level Effects: 
##                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   20.50      2.91    14.91    26.18 1.00     2087
## oracledziban                 0.67      3.00    -5.21     6.83 1.00     1655
## searchdfs                   -1.05      3.03    -6.94     4.90 1.00     1791
## datasetmovies                3.57      2.12    -0.60     7.56 1.00     2204
## task4.Exploration            1.90      1.48    -0.99     4.77 1.00     5462
## participant_groupstudent    -0.21      2.23    -4.54     4.35 1.00     2035
## oracledziban:searchdfs       1.07      4.31    -7.37     9.68 1.00     1732
##                          Tail_ESS
## Intercept                    2560
## oracledziban                 2373
## searchdfs                    2151
## datasetmovies                2412
## task4.Exploration            2195
## participant_groupstudent     2368
## oracledziban:searchdfs       1922
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     8.29      0.75     6.94     9.91 1.00     1127     1448
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pairs(
    model_interacted_var_sets,
    pars = c(
      "b_Intercept",
      "b_datasetmovies",
      "b_oracledziban",
      "b_searchdfs",
      "b_task4.Exploration",
      "b_participant_groupstudent"
    ),
    fixed = TRUE
  )

draw_data_interacted_var_sets <- interacted_var_sets_data %>%
  add_fitted_draws(model_interacted_var_sets, seed = seed, re_formula = NA)
draw_data_interacted_var_sets$condition <- paste(draw_data_interacted_var_sets$oracle, draw_data_interacted_var_sets$search)
plot_interacted_var_sets <- draw_data_interacted_var_sets %>% ggplot(aes(x = dataset, y = .value, fill = participant_group, alpha = 0.5)) +
      stat_eye(.width = c(.95, .5)) +
      theme_minimal() +
      facet_grid(task ~ condition)
    
plot_interacted_var_sets

ggsave(
      file = paste("interacted_var_sets_split_group.png", sep = ""),
      plot = plot_interacted_var_sets,
      path = paste0("../plots/posterior_draws/pariticpant_groups/num_interacted_variable_set")
    )
## Saving 7 x 5 in image
fit_info_interacted_var_sets <-
      draw_data_interacted_var_sets %>% group_by(search, oracle, task, participant_group) %>% mean_qi(.value, .width = c(.95, .5))

fit_info_interacted_var_sets
## # A tibble: 32 x 10
## # Groups:   search, oracle, task [8]
##    search oracle task  participant_gro… .value .lower .upper .width .point
##    <fct>  <fct>  <fct> <fct>             <dbl>  <dbl>  <dbl>  <dbl> <chr> 
##  1 bfs    compa… 3. P… professional       22.3   15.7   28.9   0.95 mean  
##  2 bfs    compa… 3. P… student            22.1   15.9   28.4   0.95 mean  
##  3 bfs    compa… 4. E… professional       24.2   17.7   30.9   0.95 mean  
##  4 bfs    compa… 4. E… student            24.0   17.8   30.2   0.95 mean  
##  5 bfs    dziban 3. P… professional       23.2   16.5   29.5   0.95 mean  
##  6 bfs    dziban 3. P… student            22.7   16.6   29.0   0.95 mean  
##  7 bfs    dziban 4. E… professional       25.1   18.3   31.4   0.95 mean  
##  8 bfs    dziban 4. E… student            24.6   18.5   30.8   0.95 mean  
##  9 dfs    compa… 3. P… professional       21.5   14.9   27.8   0.95 mean  
## 10 dfs    compa… 3. P… student            21.0   14.8   27.3   0.95 mean  
## # … with 22 more rows, and 1 more variable: .interval <chr>
write.csv(
      fit_info_interacted_var_sets,  "../plot_data/posterior_draws/num_interacted_variable_set/interacted_var_sets.csv",
      row.names = FALSE
    )

Differences between factors

predictive_data_interacted_var_sets <- interacted_var_sets_data %>%
    add_predicted_draws(model_interacted_var_sets, seed = seed, re_formula = NA)

Difference in search

diff_in_search_prediction <- predictive_data_interacted_var_sets %>%
    group_by(search, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = search) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_search_prediction_plot <- diff_in_search_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in interacted_var_sets (",
        diff_in_search_prediction[1, 'search'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_search_prediction$task)))

diff_in_search_prediction_plot

ggsave(
      file = "search_differences.png",
      plot = diff_in_search_prediction_plot,
      path = paste0("../plots/comparisons/num_interacted_variable_set")
    )
## Saving 7 x 5 in image
diff_in_search_prediction_plot_split_by_dataset <- diff_in_search_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_search_prediction_plot_split_by_dataset

ggsave(
      file = "search_differences_split_by_dataset.png",
      plot = diff_in_search_prediction_plot_split_by_dataset,
      path = paste0("../plots/comparisons/num_interacted_variable_set")
    )
## Saving 7 x 5 in image

Difference in oracle

diff_in_oracle_prediction <- predictive_data_interacted_var_sets %>%
    group_by(oracle, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = oracle) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'oracle', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_oracle_prediction_plot <- diff_in_oracle_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in interacted_var_sets (",
        diff_in_oracle_prediction[1, 'oracle'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_oracle_prediction$task)))

diff_in_oracle_prediction_plot

ggsave(
      file = "oracle_differences.png",
      plot = diff_in_oracle_prediction_plot,
      path = paste0("../plots/comparisons/num_interacted_variable_set")
    )
## Saving 7 x 5 in image
diff_in_oracle_prediction_plot_split_by_dataset <- diff_in_oracle_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_oracle_prediction_plot_split_by_dataset

ggsave(
      file = "oracle_differences_split_by_dataset.png",
      plot = diff_in_oracle_prediction_plot_split_by_dataset,
      path = paste0("../plots/comparisons/num_interacted_variable_set")
    )
## Saving 7 x 5 in image

Difference in groups

diff_in_group_prediction <- predictive_data_interacted_var_sets %>%
    group_by(search, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = participant_group) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_group_prediction_plot <- diff_in_group_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in interacted_var_sets (",
        diff_in_group_prediction[1, 'participant_group'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_group_prediction$task)))

diff_in_group_prediction_plot

ggsave(
      file = "group_differences.png",
      plot = diff_in_group_prediction_plot,
      path = paste0("../plots/comparisons/pariticpant_groups/num_interacted_variable_set")
    )
## Saving 7 x 5 in image
diff_in_group_prediction_plot_split_by_dataset <- diff_in_group_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_group_prediction_plot_split_by_dataset

ggsave(
      file = "group_differences_split_by_dataset.png",
      plot = diff_in_group_prediction_plot_split_by_dataset,
      path = paste0("../plots/comparisons/pariticpant_groups/num_interacted_variable_set")
    )
## Saving 7 x 5 in image

Number of Interacted Visual Designs

Read in and format data

interacted_visual_design_data <- read.csv("split_by_participant_groups/num_of_interacted_visual_design.csv")
interacted_visual_design_data <- interacted_visual_design_data %>%
  mutate(
    dataset = as.factor(dataset),
    oracle = as.factor(oracle),
    search = as.factor(search),
    task = as.factor(task),
    participant_group = as.factor(participant_group)
  )

Train model

prior_mean = 35.24
prior_sd = 25.33

stanvars <- stanvar(prior_mean, name='prior_mean') + stanvar(prior_sd, name='prior_sd')
model_interacted_visual_design <- brm(
    formula = num_interacted_visual_design ~ oracle * search + dataset + task + participant_group + (1 | participant_id),
    prior = prior(normal(prior_mean, prior_sd), class = Intercept),
    chains = 2,
    cores = 2,
    iter = 2500,
    warmup = 1000,
    data = interacted_visual_design_data,
    stanvars=stanvars,
    seed = seed,
    file = "models/interacted_visual_design_group"
  )

Plot

plot(model_interacted_visual_design)

Summary

summary(model_interacted_visual_design)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: num_interacted_visual_design ~ oracle * search + dataset + task + participant_group + (1 | participant_id) 
##    Data: interacted_visual_design_data (Number of observations: 132) 
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
##          total post-warmup samples = 3000
## 
## Group-Level Effects: 
## ~participant_id (Number of levels: 66) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     7.57      1.32     4.91    10.22 1.00      757      812
## 
## Population-Level Effects: 
##                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   21.52      3.29    15.14    28.06 1.00     1371
## oracledziban                 1.58      3.45    -5.30     8.60 1.00     1140
## searchdfs                   -2.06      3.45    -8.84     4.96 1.00     1159
## datasetmovies                3.91      2.42    -0.85     8.65 1.00     1383
## task4.Exploration            1.70      1.52    -1.39     4.74 1.00     5023
## participant_groupstudent    -0.36      2.50    -5.07     4.63 1.00     1371
## oracledziban:searchdfs       1.46      4.86    -7.98    11.04 1.00     1098
##                          Tail_ESS
## Intercept                    1820
## oracledziban                 1612
## searchdfs                    1830
## datasetmovies                1719
## task4.Exploration            1731
## participant_groupstudent     1872
## oracledziban:searchdfs       1695
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     8.55      0.81     7.19    10.35 1.00      922     1462
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pairs(
    model_interacted_visual_design,
    pars = c(
      "b_Intercept",
      "b_datasetmovies",
      "b_oracledziban",
      "b_searchdfs",
      "b_task4.Exploration",
      "b_participant_groupstudent"
    ),
    fixed = TRUE
  )

draw_data_interacted_visual_design <- interacted_visual_design_data %>%
  add_fitted_draws(model_interacted_visual_design, seed = seed, re_formula = NA)

draw_data_interacted_visual_design$condition <- paste(draw_data_interacted_visual_design$oracle, draw_data_interacted_visual_design$search)
plot_interacted_visual_design <- draw_data_interacted_visual_design %>% ggplot(aes(x = dataset, y = .value, fill = participant_group, alpha = 0.5)) +
      stat_eye(.width = c(.95, .5)) +
      theme_minimal() +
      facet_grid(task ~ condition)
    
plot_interacted_visual_design

ggsave(
      file = paste("interacted_visual_design_split_group.png", sep = ""),
      plot = plot_interacted_visual_design,
      path = paste0("../plots/posterior_draws/pariticpant_groups/num_interacted_visual_design")
    )
## Saving 7 x 5 in image
fit_info_interacted_visual_design <-
      draw_data_interacted_visual_design %>% group_by(search, oracle, task, participant_group) %>% mean_qi(.value, .width = c(.95, .5))

fit_info_interacted_visual_design
## # A tibble: 32 x 10
## # Groups:   search, oracle, task [8]
##    search oracle task  participant_gro… .value .lower .upper .width .point
##    <fct>  <fct>  <fct> <fct>             <dbl>  <dbl>  <dbl>  <dbl> <chr> 
##  1 bfs    compa… 3. P… professional       23.5   16.0   30.9   0.95 mean  
##  2 bfs    compa… 3. P… student            23.1   16.2   30.2   0.95 mean  
##  3 bfs    compa… 4. E… professional       25.2   17.7   32.6   0.95 mean  
##  4 bfs    compa… 4. E… student            24.8   17.8   31.8   0.95 mean  
##  5 bfs    dziban 3. P… professional       25.3   17.9   32.5   0.95 mean  
##  6 bfs    dziban 3. P… student            24.7   17.9   31.7   0.95 mean  
##  7 bfs    dziban 4. E… professional       27.0   19.8   34.0   0.95 mean  
##  8 bfs    dziban 4. E… student            26.4   19.5   33.3   0.95 mean  
##  9 dfs    compa… 3. P… professional       21.7   14.3   28.6   0.95 mean  
## 10 dfs    compa… 3. P… student            21.1   14.2   28.1   0.95 mean  
## # … with 22 more rows, and 1 more variable: .interval <chr>
write.csv(
      fit_info_interacted_visual_design,  "../plot_data/posterior_draws/num_interacted_visual_design/interacted_visual_design.csv",
      row.names = FALSE
    )

Differences between factors

predictive_data_interacted_visual_design <- interacted_visual_design_data %>%
    add_predicted_draws(model_interacted_visual_design, seed = seed, re_formula = NA)

Difference in search

diff_in_search_prediction <- predictive_data_interacted_visual_design %>%
    group_by(search, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = search) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_search_prediction_plot <- diff_in_search_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in interacted_visual_design (",
        diff_in_search_prediction[1, 'search'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_search_prediction$task)))

diff_in_search_prediction_plot

ggsave(
      file = "search_differences.png",
      plot = diff_in_search_prediction_plot,
      path = "../plots/comparisons/num_interacted_visual_design"
    )
## Saving 7 x 5 in image
diff_in_search_prediction_plot_split_by_dataset <- diff_in_search_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_search_prediction_plot_split_by_dataset

ggsave(
      file = "search_differences_split_by_dataset.png",
      plot = diff_in_search_prediction_plot_split_by_dataset,
      path = "../plots/comparisons/num_interacted_visual_design"
    )
## Saving 7 x 5 in image

Difference in oracle

diff_in_oracle_prediction <- predictive_data_interacted_visual_design %>%
    group_by(oracle, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = oracle) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'oracle', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_oracle_prediction_plot <- diff_in_oracle_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in interacted_visual_design (",
        diff_in_oracle_prediction[1, 'oracle'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_oracle_prediction$task)))

diff_in_oracle_prediction_plot

ggsave(
      file = "oracle_differences.png",
      plot = diff_in_oracle_prediction_plot,
      path = paste0("../plots/comparisons/num_interacted_visual_design")
    )
## Saving 7 x 5 in image
diff_in_oracle_prediction_plot_split_by_dataset <- diff_in_oracle_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_oracle_prediction_plot_split_by_dataset

ggsave(
      file = "oracle_differences_split_by_dataset.png",
      plot = diff_in_oracle_prediction_plot_split_by_dataset,
      path = paste0("../plots/comparisons/num_interacted_visual_design")
    )
## Saving 7 x 5 in image

Difference in groups

diff_in_group_prediction <- predictive_data_interacted_visual_design %>%
    group_by(search, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = participant_group) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_group_prediction_plot <- diff_in_group_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in interacted_visual_design (",
        diff_in_group_prediction[1, 'participant_group'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_group_prediction$task)))

diff_in_group_prediction_plot

ggsave(
      file = "group_differences.png",
      plot = diff_in_group_prediction_plot,
      path = "../plots/comparisons/pariticpant_groups/num_interacted_visual_design"
    )
## Saving 7 x 5 in image
diff_in_group_prediction_plot_split_by_dataset <- diff_in_group_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_group_prediction_plot_split_by_dataset

ggsave(
      file = "group_differences_split_by_dataset.png",
      plot = diff_in_group_prediction_plot_split_by_dataset,
      path = "../plots/comparisons/pariticpant_groups/num_interacted_visual_design"
    )
## Saving 7 x 5 in image

Number of Exposed Variable Sets

Read in and format data

exposed_variable_set_data <- read.csv("split_by_participant_groups/num_of_exposed_variable_set.csv")
exposed_variable_set_data <- exposed_variable_set_data %>%
  mutate(
    dataset = as.factor(dataset),
    oracle = as.factor(oracle),
    search = as.factor(search),
    task = as.factor(task),
    participant_group = as.factor(participant_group)
  )

Train model

prior_mean = 35.24
prior_sd = 25.33

stanvars <- stanvar(prior_mean, name='prior_mean') + stanvar(prior_sd, name='prior_sd')
model_exposed_variable_set <- brm(
    formula = num_exposed_variable_set ~ oracle * search + dataset + task + participant_group + (1 | participant_id),
    prior = prior(normal(prior_mean, prior_sd), class = Intercept),
    chains = 2,
    cores = 2,
    iter = 2500,
    warmup = 1000,
    data = exposed_variable_set_data,
    stanvars=stanvars,
    seed = seed,
    file = "models/exposed_variable_set_group"
  )

Plot

plot(model_exposed_variable_set)

Summary

summary(model_exposed_variable_set)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: num_exposed_variable_set ~ oracle * search + dataset + task + participant_group + (1 | participant_id) 
##    Data: exposed_variable_set_data (Number of observations: 132) 
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
##          total post-warmup samples = 3000
## 
## Group-Level Effects: 
## ~participant_id (Number of levels: 66) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)    10.58      4.40     1.33    18.32 1.01      437      862
## 
## Population-Level Effects: 
##                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   58.48      6.94    44.94    71.85 1.00     1767
## oracledziban                 0.85      7.06   -13.01    14.65 1.00     1290
## searchdfs                  -24.91      7.30   -39.44   -11.13 1.00     1304
## datasetmovies               13.63      4.90     4.24    23.51 1.00     2733
## task4.Exploration           13.01      4.22     4.85    21.19 1.00     3203
## participant_groupstudent     5.51      5.09    -4.78    15.79 1.00     1937
## oracledziban:searchdfs      22.51     10.16     3.21    43.02 1.00     1202
##                          Tail_ESS
## Intercept                    1508
## oracledziban                 1425
## searchdfs                    1192
## datasetmovies                2058
## task4.Exploration            1976
## participant_groupstudent     1809
## oracledziban:searchdfs       1805
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma    24.21      2.07    20.33    28.42 1.01      742      911
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pairs(
    model_exposed_variable_set,
    pars = c(
      "b_Intercept",
      "b_datasetmovies",
      "b_oracledziban",
      "b_searchdfs",
      "b_task4.Exploration",
      "b_participant_groupstudent"
    ),
    fixed = TRUE
  )

draw_data_exposed_variable_set <- exposed_variable_set_data %>%
  add_fitted_draws(model_exposed_variable_set, seed = seed, re_formula = NA)
draw_data_exposed_variable_set$condition <- paste(draw_data_exposed_variable_set$oracle, draw_data_exposed_variable_set$search)
plot_exposed_variable_set <- draw_data_exposed_variable_set %>% ggplot(aes(x = dataset, y = .value, fill = participant_group, alpha = 0.5)) +
      stat_eye(.width = c(.95, .5)) +
      theme_minimal() +
      facet_grid(task ~ condition)
    
plot_exposed_variable_set

ggsave(
      file = paste("exposed_variable_set_split_group.png", sep = ""),
      plot = plot_exposed_variable_set,
      path = "../plots/posterior_draws/pariticpant_groups/num_exposed_variable_set"
    )
## Saving 7 x 5 in image
fit_info_exposed_variable_set <-
      draw_data_exposed_variable_set %>% group_by(search, oracle, task, participant_group) %>% mean_qi(.value, .width = c(.95, .5))

fit_info_exposed_variable_set
## # A tibble: 32 x 10
## # Groups:   search, oracle, task [8]
##    search oracle task  participant_gro… .value .lower .upper .width .point
##    <fct>  <fct>  <fct> <fct>             <dbl>  <dbl>  <dbl>  <dbl> <chr> 
##  1 bfs    compa… 3. P… professional       65.3   46.9   83.2   0.95 mean  
##  2 bfs    compa… 3. P… student            70.8   53.6   88.6   0.95 mean  
##  3 bfs    compa… 4. E… professional       78.3   60.1   96.6   0.95 mean  
##  4 bfs    compa… 4. E… student            83.8   66.2  102.    0.95 mean  
##  5 bfs    dziban 3. P… professional       67.1   48.6   84.1   0.95 mean  
##  6 bfs    dziban 3. P… student            71.7   54.3   89.0   0.95 mean  
##  7 bfs    dziban 4. E… professional       80.1   61.6   97.3   0.95 mean  
##  8 bfs    dziban 4. E… student            84.7   67.2  102.    0.95 mean  
##  9 dfs    compa… 3. P… professional       41.4   23.0   58.0   0.95 mean  
## 10 dfs    compa… 3. P… student            45.9   28.8   63.0   0.95 mean  
## # … with 22 more rows, and 1 more variable: .interval <chr>
write.csv(
      fit_info_exposed_variable_set,  "../plot_data/posterior_draws/num_exposed_variable_set/exposed_variable_set.csv",
      row.names = FALSE
    )

Differences between factors

predictive_data_exposed_variable_set <- exposed_variable_set_data %>%
    add_predicted_draws(model_exposed_variable_set, seed = seed, re_formula = NA)

Difference in search

diff_in_search_prediction <- predictive_data_exposed_variable_set %>%
    group_by(search, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = search) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_search_prediction_plot <- diff_in_search_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in exposed_variable_set (",
        diff_in_search_prediction[1, 'search'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_search_prediction$task)))

diff_in_search_prediction_plot

ggsave(
      file = "search_differences.png",
      plot = diff_in_search_prediction_plot,
      path = "../plots/comparisons/num_exposed_variable_set"
    )
## Saving 7 x 5 in image
diff_in_search_prediction_plot_split_by_dataset <- diff_in_search_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_search_prediction_plot_split_by_dataset

ggsave(
      file = "search_differences_split_by_dataset.png",
      plot = diff_in_search_prediction_plot_split_by_dataset,
      path = "../plots/comparisons/num_exposed_variable_set"
    )
## Saving 7 x 5 in image

Difference in oracle

diff_in_oracle_prediction <- predictive_data_exposed_variable_set %>%
    group_by(oracle, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = oracle) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'oracle', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_oracle_prediction_plot <- diff_in_oracle_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in exposed_variable_set (",
        diff_in_oracle_prediction[1, 'oracle'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_oracle_prediction$task)))

diff_in_oracle_prediction_plot

ggsave(
      file = "oracle_differences.png",
      plot = diff_in_oracle_prediction_plot,
      path = paste0("../plots/comparisons/num_exposed_variable_set")
    )
## Saving 7 x 5 in image
diff_in_oracle_prediction_plot_split_by_dataset <- diff_in_oracle_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_oracle_prediction_plot_split_by_dataset

ggsave(
      file = "oracle_differences_split_by_dataset.png",
      plot = diff_in_oracle_prediction_plot_split_by_dataset,
      path = "../plots/comparisons/num_exposed_variable_set"
    )
## Saving 7 x 5 in image

Difference in groups

diff_in_group_prediction <- predictive_data_exposed_variable_set %>%
    group_by(search, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = participant_group) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_group_prediction_plot <- diff_in_group_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in exposed_variable_set (",
        diff_in_group_prediction[1, 'participant_group'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_group_prediction$task)))

diff_in_group_prediction_plot

ggsave(
      file = "group_differences.png",
      plot = diff_in_group_prediction_plot,
      path = "../plots/comparisons/pariticpant_groups/num_exposed_variable_set"
    )
## Saving 7 x 5 in image
diff_in_group_prediction_plot_split_by_dataset <- diff_in_group_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_group_prediction_plot_split_by_dataset

ggsave(
      file = "group_differences_split_by_dataset.png",
      plot = diff_in_group_prediction_plot_split_by_dataset,
      path = "../plots/comparisons/pariticpant_groups/num_exposed_variable_set"
    )
## Saving 7 x 5 in image

Number of Exposed Visual Designs

Read in and format data

exposed_visual_design_data <- read.csv("split_by_participant_groups/num_of_exposed_visual_design.csv")
exposed_visual_design_data <- exposed_visual_design_data %>%
  mutate(
    dataset = as.factor(dataset),
    oracle = as.factor(oracle),
    search = as.factor(search),
    task = as.factor(task),
    participant_group = as.factor(participant_group)
  )

Train model

prior_mean = 35.24
prior_sd = 25.33

stanvars <- stanvar(prior_mean, name='prior_mean') + stanvar(prior_sd, name='prior_sd')
model_exposed_visual_design <- brm(
    formula = num_exposed_visual_design ~ oracle * search + dataset + task + participant_group + (1 | participant_id),
    prior = prior(normal(prior_mean, prior_sd), class = Intercept),
    chains = 2,
    cores = 2,
    iter = 2500,
    warmup = 1000,
    data = exposed_visual_design_data,
    stanvars=stanvars,
    seed = seed,
    file = "models/_exposed_visual_design_group"
  )

Plot

plot(model_exposed_visual_design)

Summary

summary(model_exposed_visual_design)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: num_exposed_visual_design ~ oracle * search + dataset + task + participant_group + (1 | participant_id) 
##    Data: exposed_visual_design_data (Number of observations: 132) 
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
##          total post-warmup samples = 3000
## 
## Group-Level Effects: 
## ~participant_id (Number of levels: 66) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)    14.06      5.05     2.33    22.69 1.00      505      670
## 
## Population-Level Effects: 
##                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   61.87      8.28    44.95    77.58 1.00     1694
## oracledziban                 7.37      8.71    -9.44    24.84 1.00     1379
## searchdfs                  -30.65      8.55   -47.39   -13.14 1.00     1373
## datasetmovies               14.24      6.29     1.06    26.39 1.00     1669
## task4.Exploration           15.67      5.00     5.96    25.40 1.00     3565
## participant_groupstudent     5.91      6.28    -6.30    18.06 1.00     2131
## oracledziban:searchdfs      31.43     12.35     6.88    55.92 1.00     1259
##                          Tail_ESS
## Intercept                    2080
## oracledziban                 1715
## searchdfs                    1852
## datasetmovies                1511
## task4.Exploration            2089
## participant_groupstudent     1996
## oracledziban:searchdfs       1385
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma    28.26      2.50    23.64    33.36 1.00      858     1518
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pairs(
    model_exposed_visual_design,
    pars = c(
      "b_Intercept",
      "b_datasetmovies",
      "b_oracledziban",
      "b_searchdfs",
      "b_task4.Exploration",
      "b_participant_groupstudent"
    ),
    fixed = TRUE
  )

draw_data_exposed_visual_design <- exposed_visual_design_data %>%
  add_fitted_draws(model_exposed_visual_design, seed = seed, re_formula = NA)
draw_data_exposed_visual_design$condition <- paste(draw_data_exposed_visual_design$oracle, draw_data_exposed_visual_design$search)
plot_exposed_visual_design <- draw_data_exposed_visual_design %>% ggplot(aes(x = dataset, y = .value, fill = participant_group, alpha = 0.5)) +
      stat_eye(.width = c(.95, .5)) +
      theme_minimal() +
      facet_grid(task ~ condition)
    
plot_exposed_visual_design

ggsave(
      file = paste("exposed_visual_design_split_group.png", sep = ""),
      plot = plot_exposed_visual_design,
      path = "../plots/posterior_draws/pariticpant_groups/num_exposed_visual_design"
    )
## Saving 7 x 5 in image
fit_info_exposed_visual_design <-
      draw_data_exposed_visual_design %>% group_by(search, oracle, task, participant_group) %>% mean_qi(.value, .width = c(.95, .5))

fit_info_exposed_visual_design
## # A tibble: 32 x 10
## # Groups:   search, oracle, task [8]
##    search oracle task  participant_gro… .value .lower .upper .width .point
##    <fct>  <fct>  <fct> <fct>             <dbl>  <dbl>  <dbl>  <dbl> <chr> 
##  1 bfs    compa… 3. P… professional       69.0   48.1   89.3   0.95 mean  
##  2 bfs    compa… 3. P… student            74.9   55.1   95.1   0.95 mean  
##  3 bfs    compa… 4. E… professional       84.7   63.1  105.    0.95 mean  
##  4 bfs    compa… 4. E… student            90.6   70.2  111.    0.95 mean  
##  5 bfs    dziban 3. P… professional       77.4   56.1   97.4   0.95 mean  
##  6 bfs    dziban 3. P… student            82.3   62.5  102.    0.95 mean  
##  7 bfs    dziban 4. E… professional       93.0   71.9  113.    0.95 mean  
##  8 bfs    dziban 4. E… student            97.9   78.4  118.    0.95 mean  
##  9 dfs    compa… 3. P… professional       39.3   18.7   58.6   0.95 mean  
## 10 dfs    compa… 3. P… student            44.2   24.5   63.8   0.95 mean  
## # … with 22 more rows, and 1 more variable: .interval <chr>
write.csv(
      fit_info_exposed_visual_design,  "../plot_data/posterior_draws/num_exposed_visual_design/exposed_visual_design.csv",
      row.names = FALSE
    )

Differences between factors

predictive_data_exposed_visual_design <- exposed_visual_design_data %>%
    add_predicted_draws(model_exposed_visual_design, seed = seed, re_formula = NA)

Difference in search

diff_in_search_prediction <- predictive_data_exposed_visual_design %>%
    group_by(search, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = search) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_search_prediction_plot <- diff_in_search_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in exposed_visual_design (",
        diff_in_search_prediction[1, 'search'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_search_prediction$task)))

diff_in_search_prediction_plot

ggsave(
      file = "search_differences.png",
      plot = diff_in_search_prediction_plot,
      path = "../plots/comparisons/num_exposed_visual_design"
    )
## Saving 7 x 5 in image
diff_in_search_prediction_plot_split_by_dataset <- diff_in_search_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_search_prediction_plot_split_by_dataset

ggsave(
      file = "search_differences_split_by_dataset.png",
      plot = diff_in_search_prediction_plot_split_by_dataset,
      path = "../plots/comparisons/num_exposed_visual_design"
    )
## Saving 7 x 5 in image

Difference in oracle

diff_in_oracle_prediction <- predictive_data_exposed_visual_design %>%
    group_by(oracle, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = oracle) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'oracle', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_oracle_prediction_plot <- diff_in_oracle_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in exposed_visual_design (",
        diff_in_oracle_prediction[1, 'oracle'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_oracle_prediction$task)))

diff_in_oracle_prediction_plot

ggsave(
      file = "oracle_differences.png",
      plot = diff_in_oracle_prediction_plot,
      path = paste0("../plots/comparisons/num_exposed_visual_design")
    )
## Saving 7 x 5 in image
diff_in_oracle_prediction_plot_split_by_dataset <- diff_in_oracle_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_oracle_prediction_plot_split_by_dataset

ggsave(
      file = "oracle_differences_split_by_dataset.png",
      plot = diff_in_oracle_prediction_plot_split_by_dataset,
      path = paste0("../plots/comparisons/num_exposed_visual_design")
    )
## Saving 7 x 5 in image

Difference in groups

diff_in_group_prediction <- predictive_data_exposed_visual_design %>%
    group_by(search, task, dataset, participant_group, .draw) %>%
    summarize(value = weighted.mean(.prediction)) %>%
    compare_levels(value, by = participant_group) %>%
    rename(diff = value)
## `summarise()` regrouping output by 'search', 'task', 'dataset', 'participant_group' (override with `.groups` argument)
diff_in_group_prediction_plot <- diff_in_group_prediction %>%
    ggplot(aes(x = diff, y = task)) +
    xlab(
      paste0(
        "Difference in exposed_visual_design (",
        diff_in_group_prediction[1, 'participant_group'],
        ")"
      )
    ) +
    ylab("Task") +
    stat_halfeye(.width = c(.95, .5)) +
    geom_vline(xintercept = 0, linetype = "longdash") +
    theme_minimal() + scale_y_discrete(limits = rev(levels(diff_in_group_prediction$task)))

diff_in_group_prediction_plot

ggsave(
      file = "group_differences.png",
      plot = diff_in_group_prediction_plot,
      path = paste0("../plots/comparisons/pariticpant_groups/num_exposed_visual_design")
    )
## Saving 7 x 5 in image
diff_in_group_prediction_plot_split_by_dataset <- diff_in_group_prediction_plot + aes(fill = dataset, alpha = 0.5)

diff_in_group_prediction_plot_split_by_dataset

ggsave(
      file = "group_differences_split_by_dataset.png",
      plot = diff_in_group_prediction_plot_split_by_dataset,
      path = "../plots/comparisons/pariticpant_groups/num_exposed_visual_design"
    )
## Saving 7 x 5 in image
draw_data_exposed_variable_set$category <- "exposed"
draw_data_interacted_var_sets$category <- "interacted"

data_conbined <- rbind(draw_data_exposed_variable_set, draw_data_interacted_var_sets)
plot_var_set <- data_conbined %>% ggplot(aes(x = oracle, y = .value, fill = category, alpha = 0.5)) +
      stat_eye(.width = c(.95, .5)) +
      theme_minimal() +
      facet_grid(task ~ search) + 
      ylab("Number of Elements") 
    
plot_var_set

ggsave(
      file = "interaction_var_sets.png",
      plot = plot_var_set,
      path = "../plots/posterior_draws"
    )
## Saving 7 x 5 in image
draw_data_exposed_visual_design$category <- "exposed"
draw_data_interacted_visual_design$category <- "interacted"

data_conbined <- rbind(draw_data_exposed_visual_design, draw_data_interacted_visual_design)
plot_vis_design <- data_conbined %>% ggplot(aes(x = oracle, y = .value, fill = category, alpha = 0.5)) +
      stat_eye(.width = c(.95, .5)) +
      theme_minimal() +
      facet_grid(task ~ search) + 
      ylab("Number of Elements") 
    
plot_vis_design

ggsave(
      file = "interaction_vis_design.png",
      plot = plot_vis_design,
      path = "../plots/posterior_draws"
    )
## Saving 7 x 5 in image